package banan

import (
	"fmt"
	"github.com/antchfx/htmlquery"
	"github.com/gocolly/colly"
	"github.com/gogf/gf/v2/crypto/gmd5"
	"github.com/gogf/gf/v2/encoding/gjson"
	"github.com/gogf/gf/v2/os/gctx"
	"github.com/gogf/gf/v2/os/gtime"
	"github.com/gogf/gf/v2/text/gregex"
	"github.com/gogf/gf/v2/text/gstr"
	"strings"
	"uni-crawl-frame/core"
	"uni-crawl-frame/db/mysql/dao"
	"uni-crawl-frame/db/mysql/model/entity"
	"uni-crawl-frame/service/crawl/vodservice"
	"uni-crawl-frame/utils/constant"
	"uni-crawl-frame/utils/httputil"
)

const (
	jsonRegex = "player_aaaa=({.*?})"
)

type BananTvCrawl struct {
	*core.AbstractCrawlByBrowser
}

func (r *BananTvCrawl) UseBrowser() bool {
	return false
}

func (r BananTvCrawl) FillTargetRequest(ctx *core.ApplicationContext) {

	if ctx.CrawlVodCtx != nil {
		crawlTVList(ctx)
	} else if ctx.CrawlTVCtx.VodTV != nil {
		crawlTVItem(ctx)
	}

}

//获取分类
func getTagByURLID(id string) string {
	var tagMap = make(map[string]string)
	//AdultTag := [...]string{"三级伦理", "中文字幕", "亚洲有码", "人妻熟女", "制服诱惑", "国产情色", "强奸乱伦",
	//	"成人动画", "日本无码", "欧美情色", "精品推荐", "网红主播", "美乳巨乳", "萝莉少女"}
	tagMap["25"] = "国产情色"
	tagMap["77"] = "中文字幕"
	tagMap["66"] = "日本无码"
	tagMap["91"] = "日本无码"
	tagMap["44"] = "萝莉少女"
	tagMap["43"] = "制服诱惑"
	tagMap["90"] = "成人动画"
	if v, ok := tagMap[id]; ok {
		return v
	} else {
		//未定义则后台不添加tag
		return ""
	}
}
func crawlTVList(ctx *core.ApplicationContext) {
	ctx.Log.Infof(gctx.GetInitCtx(),
		"crawl banan TV List . SeedUrl = %v", ctx.CrawlVodCtx.SeedUrl)

	coll := colly.NewCollector()
	coll.OnResponse(func(response *colly.Response) {
		parse, _ := htmlquery.Parse(strings.NewReader(string(response.Body)))
		if parse == nil {
			return
		}

		tvList := htmlquery.Find(parse, "//*[@class='video-img-box mb-e-20']")
		for _, tvItem := range tvList {
			titleNode := htmlquery.FindOne(tvItem, ".//*[@class='title']/a")
			title := htmlquery.InnerText(titleNode)
			href := htmlquery.SelectAttr(titleNode, "href")
			if !gstr.Contains(href, "vodplay") {
				continue
			}

			imgNode := htmlquery.FindOne(tvItem, ".//img")
			imgUrl := htmlquery.SelectAttr(imgNode, "data-src")

			if !gstr.HasPrefix(href, "http") {
				href = httputil.GetBaseUrlBySchema(ctx.CrawlVodCtx.SeedUrl) + href
			}

			if !gstr.HasPrefix(imgUrl, "http") {
				imgUrl = httputil.GetBaseUrlBySchema(ctx.CrawlVodCtx.SeedUrl) + imgUrl
			}

			ctx.Log.Infof(gctx.GetInitCtx(),
				"title = %v, href = %v", title, href)
			aa, _ := gregex.MatchString(`https://banan.tv/vodtype/(.*)\.html`, ctx.CrawlVodCtx.SeedUrl)
			//g.Dump(aa)
			var id string = "0"
			if len(aa) > 1 {
				id = aa[1]
			}

			tv := new(entity.CmsCrawlVodTv)
			tv.VideoTag = getTagByURLID(id)
			tv.CrawlStatus = vodservice.CrawlTVInit
			tv.VodConfigId = ctx.CrawlVodCtx.VodConfigId
			tv.VodMd5 = gmd5.MustEncryptString(href)
			tv.VideoName = title
			tv.SeedUrl = href
			tv.VideoIcon = imgUrl
			tv.CreateTime = gtime.Now()
			_, _ = dao.CmsCrawlVodTv.Ctx(gctx.GetInitCtx()).Insert(tv)
		}
	})

	err := coll.Visit(ctx.CrawlVodCtx.SeedUrl)
	if err != nil {
		ctx.Log.Error(gctx.GetInitCtx(), err)
	}

	// 翻页vodtype/22-1.html vodtype/22-2.html vodtype/22-3.html ...
	if gstr.Contains(ctx.CrawlVodCtx.SeedUrl, "vodtype") {
		urlPrefix := httputil.GetBaseUrlByBackslash(ctx.CrawlVodCtx.SeedUrl)
		seedUrlTail := gstr.Replace(ctx.CrawlVodCtx.SeedUrl, urlPrefix, "")
		if !gstr.Contains(seedUrlTail, "-") || gstr.Contains(seedUrlTail, "-1.html") {
			// 首页需要循环抓翻页
			rets, _ := gregex.MatchString("\\d+", seedUrlTail)
			if len(rets) > 0 {
				seedClass := rets[0]
				for i := 2; i < ctx.CrawlVodCtx.PageSize; i++ {
					ctx.CrawlVodCtx.SeedUrl = fmt.Sprintf("%v%v-%v.html", urlPrefix, seedClass, i)
					crawlTVList(ctx)
				}
			}
		}
	}
}

func crawlTVItem(ctx *core.ApplicationContext) {
	crawlTVDto := ctx.CrawlTVCtx.VodTV
	ctx.Log.Infof(gctx.GetInitCtx(),
		"pad info. id = %v, SeedUrl = %v", crawlTVDto.Id, crawlTVDto.SeedUrl)

	coll := colly.NewCollector()
	coll.OnResponse(func(response *colly.Response) {
		parse, _ := htmlquery.Parse(strings.NewReader(string(response.Body)))
		titleNode := htmlquery.FindOne(parse, "//*[@class='info-header']//*[@class='title']")

		rets, _ := gregex.MatchString(jsonRegex, string(response.Body))
		url := ""
		if len(rets) > 1 {
			playerAaaa := gjson.New(rets[1])
			url = playerAaaa.Get("url").String()
			ctx.Log.Infof(gctx.GetInitCtx(),
				"Banan.TV m3u8Url = %s", url)
		}

		if titleNode == nil || htmlquery.InnerText(titleNode) == "" || url == "" {
			crawlTVDto.ErrorCnt += 1
			if crawlTVDto.ErrorCnt >= constant.ServerMaxRetry {
				ctx.Log.Infof(gctx.GetInitCtx(),
					"pad info update vod id = %v, to status = %v", crawlTVDto.Id, vodservice.CrawlTVPadInfoErr)
				vodservice.UpdateVodTVStatus(crawlTVDto, vodservice.CrawlTVPadInfoErr)
			} else {
				ctx.Log.Infof(gctx.GetInitCtx(),
					"pad info. update vod id = %v, to status = %v", crawlTVDto.Id, vodservice.CrawlTVInit)
				vodservice.UpdateVodTVStatus(crawlTVDto, vodservice.CrawlTVInit)
			}
			return
		}

		yearNode := htmlquery.FindOne(parse, "//*[@class='info-header']//*[@class='mr-3']")
		if yearNode != nil {
			rets, _ = gregex.MatchString("\\d+", htmlquery.InnerText(yearNode))
			if len(rets) > 0 {
				crawlTVDto.VideoYear = rets[0]
			}
		}
		ctx.Log.Infof(gctx.GetInitCtx(),
			"pad info. update vod id = %v, to status = %v", crawlTVDto.Id, vodservice.CrawlTVPadInfoOK)

		//ctx.VodTV.VideoTag = ""
		vodservice.UpdateVodTVStatus(crawlTVDto, vodservice.CrawlTVPadInfoOK)

		if vodservice.GetVodTvItemByMd5(crawlTVDto.VodMd5) == nil {
			vodTvItem := new(entity.CmsCrawlVodTvItem)
			vodTvItem.CreateTime = gtime.Now()
			vodTvItem.TvId = crawlTVDto.Id
			vodTvItem.TvItemMd5 = crawlTVDto.VodMd5
			vodTvItem.CrawlStatus = vodservice.CrawlTVItemInit
			vodTvItem.SeedUrl = crawlTVDto.SeedUrl
			vodTvItem.Episodes = "1" // 只有1集

			// 直接把m3u8塞进去
			vodTvItem.SeedParams = url
			_, _ = dao.CmsCrawlVodTvItem.Ctx(gctx.GetInitCtx()).Insert(vodTvItem)
		}

	})

	err := coll.Visit(crawlTVDto.SeedUrl)
	if err != nil {
		ctx.Log.Error(gctx.GetInitCtx(), err)
	}
}
